library(tidyverse)
library(readxl)
library(factoextra)
library(viridis)
library(vegan)
library(rpart)
library(partykit)
library(rpart.plot)
library(ggrepel)
library(lubridate)
library(ggpubr)
library(knitr)
physio <- read_excel(
path = file.path("..",
"data",
"analysis_outputs",
"study-site-tables.xlsx"))
# Grab bigjoin for land cover
bigjoin <- readRDS(file = "../data/analysis_outputs/bigjoin.rds")
bigjoin_subset <- bigjoin %>%
select(park_code, site_code, solar_jas, SWE_May, flush_index_noSWE,
forest:snowice) %>%
unique() %>%
group_by(park_code, site_code) %>%
summarize_all(.funs = ~ mean(., na.rm = TRUE)) %>%
left_join(x = ., y = select(bigjoin, site_code, Aspect) %>% unique(),
by = c("site_code"))
# Name matching table
match_sites <- readRDS(file = file.path("..",
"data",
"name_site_matches.rds"))
Select the static variables that go into the analyses
# Select continuous variables
physio_subset <- physio %>%
select(Park_code, Lake, Elevation_m, Watershed_area_to_lake_area_ratio,
Surface_area_ha, Watershed_area_ha, Depth_mean_m, Volume_m3,
BlueLineOutlet) %>%
left_join(x = ., y = match_sites,
by = c("Lake" = "old_name")) %>%
select(Park_code, site_code, everything(), -Lake) %>%
full_join(x = ., y = bigjoin_subset, by = c("Park_code" = "park_code",
"site_code"))
Scale a numeric subset of the data
physio_scale <- physio_subset %>%
select(Elevation_m:snowice) %>%
mutate_if(.predicate = is.numeric,
.funs = ~ scale(.)) %>%
cbind(Park_code = physio_subset$Park_code, .)
row.names(physio_scale) <- paste0(physio_subset$Park_code,
"_", physio_subset$site_code)
physio_pca <- rda(na.omit(select_if(.tbl = physio_scale,
.predicate = is.numeric)))
physio_pca
## Call: rda(X = na.omit(select_if(.tbl = physio_scale, .predicate =
## is.numeric)))
##
## Inertia Rank
## Total 14
## Unconstrained 14 14
## Inertia is variance
##
## Eigenvalues for unconstrained axes:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13
## 3.993 3.613 2.054 1.625 0.889 0.552 0.515 0.330 0.248 0.070 0.057 0.031 0.016
## PC14
## 0.005
<<<<<<< HEAD
Add hulls (currently for park)
# https://rpubs.com/brouwern/veganpca#:~:text=PCA%20(Principal%20Components%20Analysis)%20is,is%20also%20developing%20biplot%20tools).
biplot(physio_pca,
display = c("sites",
"species"),
type = c("text",
"points"))
ordihull(physio_pca,
group = physio_scale$Park_code)
<<<<<<< HEAD
physio_pca <- prcomp(x = select_if(.tbl = physio_scale,
.predicate = is.numeric),
scale. = FALSE)
physio_pca
## Standard deviations (1, .., p=14):
## [1] 1.99832728 1.90086537 1.43332969 1.27465404 0.94303813 0.74290374
## [7] 0.71778513 0.57484149 0.49816889 0.26426417 0.23940289 0.17599454
## [13] 0.12835909 0.06756623
##
## Rotation (n x k) = (14 x 14):
## PC1 PC2 PC3
## Elevation_m 0.098718984 0.23475184 -0.34416855
## Watershed_area_to_lake_area_ratio 0.449796087 -0.17888412 0.06708688
## Surface_area_ha -0.320374605 -0.37868613 -0.04928645
## Watershed_area_ha 0.152014455 -0.38729584 0.09538898
## Depth_mean_m -0.314680520 -0.13136245 -0.30725730
## Volume_m3 -0.318831925 -0.33904928 -0.13481002
## solar_jas -0.174628564 0.31506057 -0.11295205
## SWE_May 0.108484795 -0.23484318 -0.19312744
## flush_index_noSWE 0.444641918 -0.17602530 0.10130398
## forest -0.207463643 0.22041638 0.46708998
## shrub -0.226003703 -0.33578277 0.02934696
## meadow 0.094780455 -0.36965147 0.21862863
## barren 0.344040834 0.02483731 -0.38294310
## snowice -0.008680226 -0.05734407 -0.52833824
## PC4 PC5 PC6
## Elevation_m 0.41258776 -0.31740973 0.33363865
## Watershed_area_to_lake_area_ratio -0.11522654 0.07388409 0.16652979
## Surface_area_ha 0.04503952 -0.02927842 0.23142526
## Watershed_area_ha 0.21859443 0.14297311 0.44632710
## Depth_mean_m -0.35718999 0.11414038 0.18644067
## Volume_m3 -0.13802930 -0.09973187 0.30233727
## solar_jas 0.48473494 0.18402586 0.15053886
## SWE_May 0.28475866 0.74122416 -0.12510095
## flush_index_noSWE -0.12700007 0.03565136 0.06481845
## forest -0.21198410 0.13492904 0.36129182
## shrub 0.36797393 -0.23266276 -0.23461962
## meadow 0.16343962 -0.36613960 -0.24178192
## barren -0.09451794 -0.24024043 0.31004156
## snowice -0.26809072 0.02929043 -0.31518814
## PC7 PC8 PC9
## Elevation_m -0.07616023 0.08100936 -0.57893303
## Watershed_area_to_lake_area_ratio -0.04847590 -0.11552086 -0.07604728
## Surface_area_ha 0.09206120 -0.21046271 0.08495574
## Watershed_area_ha 0.48202756 0.02421180 0.12884070
## Depth_mean_m -0.20906331 0.43722376 -0.01275448
## Volume_m3 -0.18023234 -0.06780204 -0.23499543
## solar_jas 0.25123441 0.11776102 0.20089734
## SWE_May -0.24952486 0.15329918 -0.08765963
## flush_index_noSWE -0.03546764 -0.15681991 -0.34007720
## forest 0.21615457 0.02549403 -0.13505252
## shrub -0.14145797 -0.41986698 0.07583023
## meadow 0.15021705 0.69774804 -0.02454281
## barren -0.13544794 0.01670222 0.59789292
## snowice 0.66373859 -0.11584307 -0.18462463
## PC10 PC11 PC12
## Elevation_m 0.08851658 -0.110103587 0.205424087
## Watershed_area_to_lake_area_ratio 0.02760596 0.495118264 -0.151998547
## Surface_area_ha -0.29885911 0.011215411 0.094717801
## Watershed_area_ha 0.37964207 -0.336082768 -0.119946146
## Depth_mean_m 0.53470482 0.269820102 -0.001871218
## Volume_m3 -0.43727170 -0.001883573 -0.292924661
## solar_jas -0.14821476 0.544681169 -0.315502190
## SWE_May -0.19191584 -0.082498641 0.326740012
## flush_index_noSWE -0.01224291 0.274909842 -0.052099875
## forest -0.09816626 0.202598487 0.593683679
## shrub 0.35071759 0.325059658 0.316915890
## meadow -0.21504272 0.133869628 0.114302137
## barren -0.19400039 0.037022736 0.350847247
## snowice -0.07330691 0.108584281 0.165712590
## PC13 PC14
## Elevation_m -0.006973226 -0.157066163
## Watershed_area_to_lake_area_ratio 0.374863403 -0.532641187
## Surface_area_ha -0.549383218 -0.484624706
## Watershed_area_ha 0.108319969 0.132759306
## Depth_mean_m -0.142039730 -0.003808667
## Volume_m3 0.398023291 0.342155721
## solar_jas -0.082231367 0.153925841
## SWE_May 0.072913414 0.021677342
## flush_index_noSWE -0.544157066 0.472646941
## forest 0.138575947 0.092570128
## shrub 0.165214727 0.179668626
## meadow 0.008659048 -0.004745226
## barren 0.022414455 0.181368767
## snowice 0.109608306 0.026963149
Scree plot
fviz_eig(physio_pca)
Bioplot & variable contribution
fviz_pca_biplot(X = physio_pca, col.var = viridis(n = 5)[2],
col.ind = plasma(n = 5)[4], ggtheme = theme_bw())
fviz_pca_var(X = physio_pca, col.var = "contrib", repel = TRUE,
gradient.cols = viridis(n = 5, begin = 0, end = 1),
ggtheme = theme_bw())
<<<<<<< HEAD
mean_chl <- bigjoin %>%
select(park_code, site_code, event_year, variable, value) %>%
filter(variable == "Chlorophyll") %>%
spread(key = variable, value = value) %>%
group_by(park_code, site_code) %>%
summarise(mean_chl = mean(Chlorophyll))
<<<<<<< HEAD
## `summarise()` regrouping output by 'park_code' (override with `.groups` argument)
=======
## `summarise()` regrouping output by 'park_code' (override with `.groups` argument)
>>>>>>> 3085f632f26a1c9781c85434701e49bcd3e2183c
reg_tree_data <- physio_subset %>%
full_join(x = ., y = mean_chl, by = c("Park_code" = "park_code",
"site_code"))
Specify the tree, setting minimum number of obs per node before a split to be 10
tree <- rpart(formula = mean_chl ~ Elevation_m + Watershed_area_to_lake_area_ratio +
Depth_mean_m + forest + barren + solar_jas + Volume_m3 +
SWE_May + flush_index_noSWE,
data = reg_tree_data, minsplit = 10)
plot(x = as.party(tree),
main = "Chlorophyll",
terminal_panel = node_boxplot)
<<<<<<< HEAD
plotcp(tree)
ptree <- prune(tree, cp = 0.025);
rpart.plot(ptree, main="Average Chlorophyll")
plotcp(tree)
ptree <- prune(tree, cp = 0.025);
rpart.plot(ptree, main="Average Chlorophyll")
png("NPS_FCA_chl_tree.png", width=1000, height=800)
Based on the cross-validation plot, the ideal tree size is 1...
Import Power's data object from his analysis script (v32, ~ line 604)
powers_profile_data <- readRDS(file = "../data/analysis_outputs/sp_dataplot.rds")
powers_profile_data <- powers_profile_data %>%
separate(data = .,
col = park_site,
into = c("park", "site"),
sep = " ",
extra = "merge")
plot_wtempSWE_snotel <- ggplot(data = powers_profile_data,
aes(x = SWE_May_snotel, y = value, group = "black",
color = (park_code), label = short_code)) +
geom_line(aes(group = short_code), alpha = 0.5)+
geom_text_repel(data = filter(powers_profile_data, snowyrlo_snotel == 1),
size = 2.5, segment.color = "black", alpha = 1,
segment.size = 0.2, box.padding = 0.1) +
xlab("May SWE (cm), SNOTEL")+
ylab(paste0("Water temperature (", intToUtf8(176), "C), top 2m"))+
theme_bw()+
theme(legend.position = "none")+
theme(strip.text.x = element_text(),
axis.title.x = element_text(vjust = -0.5)) +
scale_colour_viridis_d(end = 0.85) +
xlim(x = c(0, 325))
plot_wtempSWE_snodas <- ggplot(powers_profile_data,
aes(x = SWE_May_snodas, y = value,
group = "black", color = (park_code),
label = short_code)) +
geom_line(aes(group = short_code), alpha = 0.5)+
geom_text_repel(data = filter(powers_profile_data, snowyrlo_snotel == 1),
size = 2.5, segment.color = "black", alpha = 1,
segment.size = 0.2, box.padding = 0.1) +
xlab("May SWE (cm), SNODAS") +
ylab(paste0("Water temperature (", intToUtf8(176), "C), top 2m"))+
theme_bw() +
theme(legend.position = "none") +
theme(strip.text.x = element_text(),
axis.title.x = element_text(vjust = -0.5)) +
scale_colour_viridis_d(end = 0.85) +
xlim(x = c(0, 325))
lake_list <- split(powers_profile_data, f = powers_profile_data$short_code)
snodas_models <- map_df(.x = lake_list,
.f = ~ {
model_output <- lm(formula = value ~ SWE_May_snodas,
data = .x)$coefficients[2]
output <- tibble(
park = unique(.x$park),
site = unique(.x$site),
slope = model_output)
return(output)
})
snotel_models <- map_df(.x = lake_list,
.f = ~ {
model_output <- lm(formula = value ~ SWE_May_snotel,
data = .x)$coefficients[2]
output <- tibble(
park = unique(.x$park),
site = unique(.x$site),
slope = model_output)
return(output)
})
# Going to recreate SP's plot but using actual lm() outputs to make sure that
# the regression coefficient values are accurate:
# https://stackoverflow.com/questions/44865508/using-ggplot2-to-plot-an-already-existing-linear-model
snodas_predict <- map_df(.x = lake_list,
.f = ~ {
model_output <- lm(formula = value ~ SWE_May_snodas,
data = .x)
output <- cbind(.x,
fit_source = "snodas",
fit = predict(model_output),
slope = round(model_output$coefficients[2],
digits = 2))
return(output)
})
snodas_predict_plot <- snodas_predict %>%
ggplot(data = .,
aes(x = SWE_May_snodas, y = fit,
group = "black", color = (park_code))) +
geom_line(aes(group = short_code), alpha = 0.5)+
geom_text_repel(data = filter(snodas_predict, snowyrlo_snotel == 0),
size = 2.5, segment.color = "black", alpha = 1,
segment.size = 0.2, box.padding = 0.1,
mapping = aes(label = paste0(short_code, "_", slope))) +
xlab("May SWE (cm), SNODAS") +
ylab(paste0("Water temperature (", intToUtf8(176), "C), top 2m"))+
theme_bw() +
theme(legend.position = "none") +
theme(strip.text.x = element_text(),
axis.title.x = element_text(vjust = -0.5)) +
scale_colour_viridis_d(end = 0.85) +
xlim(x = c(0, 325))
snotel_predict <- map_df(.x = lake_list,
.f = ~ {
model_output <- lm(formula = value ~ SWE_May_snotel,
data = .x)
output <- cbind(.x,
fit_source = "snotel",
fit = predict(model_output),
slope = round(model_output$coefficients[2],
digits = 2))
return(output)
})
snotel_predict_plot <- snotel_predict %>%
ggplot(data = .,
aes(x = SWE_May_snotel, y = fit,
group = "black", color = (park_code))) +
geom_line(aes(group = short_code), alpha = 0.5)+
geom_text_repel(data = filter(snotel_predict, snowyrlo_snotel == 0),
size = 2.5, segment.color = "black", alpha = 1,
segment.size = 0.2, box.padding = 0.1,
mapping = aes(label = paste0(short_code, "_", slope))) +
xlab("May SWE (cm), snotel") +
ylab(paste0("Water temperature (", intToUtf8(176), "C), top 2m"))+
theme_bw() +
theme(legend.position = "none") +
theme(strip.text.x = element_text(),
axis.title.x = element_text(vjust = -0.5)) +
scale_colour_viridis_d(end = 0.85) +
xlim(x = c(0, 325))
ggarrange(plot_wtempSWE_snodas, snodas_predict_plot,
plot_wtempSWE_snotel, snotel_predict_plot)
<<<<<<< HEAD
# First join coefficient datasets, then update site names, then add
# the static variables
swe_modeling_data <- full_join(x = snodas_models, y = snotel_models,
by = c("park", "site"),
suffix = c(.x = "_snodas", .y = "_snotel")) %>%
left_join(x = ., y = match_sites, by = c("site" = "old_name")) %>%
left_join(x = ., y = physio_subset, by = c("park" = "Park_code",
"site_code"))
# Set up a tree using all variables, but leave out slope_snodas as a predictor
snodas_tree <- rpart(formula = slope_snodas ~ Elevation_m +
Watershed_area_to_lake_area_ratio + Surface_area_ha +
Watershed_area_ha + Depth_mean_m + forest + shrub + meadow +
barren + snowice,
data = swe_modeling_data, minsplit = 10)
plot(x = as.party(snodas_tree),
main = "SNODAS Slopes",
terminal_panel = node_boxplot)
plotcp(snodas_tree)
<<<<<<< HEAD
snotel_tree <- rpart(formula = slope_snodas ~ Elevation_m +
Watershed_area_to_lake_area_ratio + Depth_mean_m + forest +
barren,
data = swe_modeling_data, minsplit = 10)
plot(x = as.party(snotel_tree),
main = "SNOTEL Slopes",
terminal_panel = node_boxplot)
plotcp(snotel_tree)
<<<<<<< HEAD
# If the snotel model is in the ballpark, it’s highest elevation
# forested lakes with shallow depths that have the anomalous slopes
Note that weekly temps are not calculated around a specific point, but based on the calendar year. So if we'd like to compare means of weeks centered, e.g., on the date a profile measurement was taken, we'll have to change this. This might show what it would take.
daily_temps <- read.csv(file = "../data/analysis_outputs/all-daily-temp-summaries.csv",
stringsAsFactors = FALSE) %>%
select(obs_year:mean_value) %>%
filter(measure == "SurfaceTemp") %>%
left_join(x = ., y = match_sites, by = c("lake" = "old_name")) %>%
select(park, site_code, everything(), -lake)
profile_data <- read_excel(
path = file.path("..",
"data",
"NPS_NCCN_Mtn_Lakes_Exports",
"qs_b364_Water_Column_Profile_Data_20200723_160058.xlsx"),
col_types = c("text", "text", "text", "numeric", "date", "logical", "numeric",
"numeric", "date", "text", "logical", "numeric", "text", "text",
"text", "date", "text"))%>%
left_join(x = ., y = match_sites, by = c("Site_name" = "old_name")) %>%
select(Park_code, site_code, everything(), -Site_code) %>%
mutate(week_number = week(Start_date))
profile_dates <- profile_data %>%
select(Park_code, site_code, Start_date, Event_year, week_number)
# Summarize daily temps by week, only for weeks that match to profile temp
# measurement events
weekly_temps <- daily_temps %>%
mutate(new_date = ymd(truncated = 1,
paste(obs_year, obs_month, obs_day, sep = "-")),
week_number = week(new_date)) %>%
semi_join(x = ., y = profile_dates, by = c("park" = "Park_code",
"site_code",
"obs_year" = "Event_year",
"week_number")) %>%
group_by(park, site_code, obs_year, week_number) %>%
summarise(mean_surf_temp = mean(mean_value))
<<<<<<< HEAD
## `summarise()` regrouping output by 'park', 'site_code', 'obs_year' (override with `.groups` argument)
# Pull the profile temp vals from bigjoin (where they've been aggregated already)
=======
## `summarise()` regrouping output by 'park', 'site_code', 'obs_year' (override with `.groups` argument)
# Pull the profile temp vals from bigjoin (where they've been aggregated already)
>>>>>>> 3085f632f26a1c9781c85434701e49bcd3e2183c
profile_temps <- bigjoin %>%
select(park_code, site_code, start_date, event_year, variable, value) %>%
filter(variable == "ProfTemp_top2m") %>%
mutate(week_number = week(start_date))
# Join daily and profile temps based on week in which they occurred
joined_temps <- full_join(x = weekly_temps, y = profile_temps,
by = c("park" = "park_code",
"site_code",
"obs_year" = "event_year",
"week_number")) %>%
spread(key = variable, value = value) %>%
# Remove Hoh, because of lake of continuous data (expected, right?)
filter(site_code != "Hoh")
ggplot(data = joined_temps) +
geom_point(aes(x = ProfTemp_top2m, y = mean_surf_temp, color = site_code))
## Warning: Removed 12 rows containing missing values (geom_point).
<<<<<<< HEAD

=======

>>>>>>> 3085f632f26a1c9781c85434701e49bcd3e2183c
ggplot(data = joined_temps) +
geom_point(aes(x = ProfTemp_top2m, y = mean_surf_temp)) +
facet_wrap(. ~ site_code) +
geom_smooth(method = "lm", aes(x = ProfTemp_top2m, y = mean_surf_temp),
se = FALSE)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
<<<<<<< HEAD

=======

>>>>>>> 3085f632f26a1c9781c85434701e49bcd3e2183c